\(安裝所需要的套件\)
setwd("~/Desktop/R/data")
if (!require("pacman"))
install.packages("pacman")
## Loading required package: pacman
pacman::p_load(glmnet,caret,GGally,tidyverse,PerformanceAnalytics,caret,plotly,rpart,rpart.plot,corrplot)
\(匯入所需要的資料,包含17-18年球員薪資以及球員年度的數據(16-17)\)
setwd("~/Desktop/R/data")
salary<-read.csv('NBA_salary1718.csv')
data<-read.csv('Seasons_Stats.csv')
\(處理salary中有重複值得情況\)
index<-duplicated(salary$Player)
salary1<-salary[!index,] %>% select(X:season17_18)
duplicated(salary1$Player)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [144] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [155] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [166] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [177] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [188] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [199] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [210] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [232] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [243] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [254] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [276] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [287] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [298] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [309] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [320] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [331] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [342] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [353] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [364] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [375] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [386] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [397] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [408] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [419] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [430] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [441] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [452] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [463] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [474] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [485] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [496] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [507] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [518] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [529] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
\(將球員年度資料進行篩選(17-18),並創建新的變數,最後將兩筆資料進行整合\)
data16_17 <- data %>% filter(Year >= 2017) %>%
select(Year:G, MP, PER, FG:PTS) %>%
distinct(Player, .keep_all = TRUE) %>%
mutate(
MPG = MP / G,
PPG = PTS / G,
APG = AST / G,
RPG = TRB / G,
TOPG = TOV / G,
BPG = BLK / G,
SPG = STL / G
)
data17_salary <- merge(salary1, data16_17, by.x = "Player", by.y = "Player")
\(對變數進行篩選並處理遺漏值\)
data17_salary <- data17_salary[-8]
data17_salary <- data17_salary[-2]
names(data17_salary)[3] <- "salary17_18"
names(data17_salary)[2] <- "Team"
na.rows1<-is.na(data17_salary[,15])
na.rows2<-is.na(data17_salary[,22])
data17_salary[na.rows1,15]<-0
data17_salary[na.rows2,22]<-0
sum(is.na(data17_salary))
## [1] 0
\(進行變數之間的相關分析\)
corrplot(
cor(
data17_salary %>% select(G:SPG),
use = "complete.obs"),
method = "circle",
type = "upper"
)
\(將個變數之間的相關係數轉成矩陣形式\)
data17_salary_cor <-
data17_salary %>%
select(salary17_18,G:SPG)
data17_salary_cor<-as.matrix(cor(data17_salary_cor))
data17_salary_cor[lower.tri(data17_salary_cor)]=0
data17_salary_cor
## salary17_18 G MP PER FG FGA
## salary17_18 1 0.4037219 0.6559081 0.5489080 0.7101613 0.6883902
## G 0 1.0000000 0.8383092 0.3130672 0.6639126 0.6708012
## MP 0 0.0000000 1.0000000 0.4829999 0.8947597 0.9001646
## PER 0 0.0000000 0.0000000 1.0000000 0.6793414 0.6144360
## FG 0 0.0000000 0.0000000 0.0000000 1.0000000 0.9856941
## FGA 0 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000
## FG. 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## X3P 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## X3PA 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## X3P. 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## X2P 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## X2PA 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## X2P. 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## eFG. 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## FT 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## FTA 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## FT. 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## ORB 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## DRB 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## TRB 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## AST 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## STL 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## BLK 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## TOV 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## PF 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## PTS 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## MPG 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## PPG 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## APG 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## RPG 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## TOPG 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## BPG 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## SPG 0 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## FG. X3P X3PA X3P. X2P
## salary17_18 0.1953613 0.4772669 0.4702018 0.07586777 0.66840731
## G 0.1796135 0.4928624 0.5200902 0.22029991 0.60617723
## MP 0.1667988 0.6570707 0.6793166 0.26066201 0.81981918
## PER 0.5758723 0.2786068 0.2661435 0.09576162 0.71069185
## FG 0.2190771 0.6451252 0.6575680 0.22441080 0.95199366
## FGA 0.1066086 0.7178409 0.7382435 0.28488135 0.90554442
## FG. 1.0000000 -0.1689054 -0.1999208 -0.20688844 0.33285275
## X3P 0.0000000 1.0000000 0.9910310 0.50848781 0.38025762
## X3PA 0.0000000 0.0000000 1.0000000 0.49659395 0.39891237
## X3P. 0.0000000 0.0000000 0.0000000 1.00000000 0.06791994
## X2P 0.0000000 0.0000000 0.0000000 0.00000000 1.00000000
## X2PA 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## X2P. 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## eFG. 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## FT 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## FTA 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## FT. 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## ORB 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## DRB 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## TRB 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## AST 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## STL 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## BLK 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## TOV 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## PF 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## PTS 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## MPG 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## PPG 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## APG 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## RPG 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## TOPG 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## BPG 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## SPG 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000
## X2PA X2P. eFG. FT FTA
## salary17_18 0.6611878 0.14524581 0.2098772 0.6630335 0.6805263
## G 0.6106818 0.18536868 0.2699956 0.4846764 0.5090092
## MP 0.8295912 0.16808080 0.2531583 0.7281117 0.7458603
## PER 0.6735278 0.41460517 0.5363623 0.6881910 0.7032672
## FG 0.9552715 0.19066729 0.2484780 0.8683891 0.8738987
## FGA 0.9305124 0.10486715 0.1727023 0.8683017 0.8596168
## FG. 0.2504885 0.83610727 0.8648042 0.1412742 0.2010573
## X3P 0.4175963 -0.05140754 0.1594384 0.5483900 0.4901510
## X3PA 0.4398896 -0.05823271 0.1165903 0.5723125 0.5157314
## X3P. 0.1096394 -0.16024707 0.1678793 0.1589913 0.1046953
## X2P 0.9890084 0.25138994 0.2368944 0.8314403 0.8614421
## X2PA 1.0000000 0.17123587 0.1666233 0.8452715 0.8644312
## X2P. 0.0000000 1.00000000 0.7662018 0.1180719 0.1606135
## eFG. 0.0000000 0.00000000 1.0000000 0.1382029 0.1710598
## FT 0.0000000 0.00000000 0.0000000 1.0000000 0.9864574
## FTA 0.0000000 0.00000000 0.0000000 0.0000000 1.0000000
## FT. 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## ORB 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## DRB 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## TRB 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## AST 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## STL 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## BLK 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## TOV 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## PF 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## PTS 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## MPG 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## PPG 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## APG 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## RPG 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## TOPG 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## BPG 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## SPG 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## FT. ORB DRB TRB AST
## salary17_18 0.17485058 0.41888306 0.619828459 0.58439551 0.529849060
## G 0.27547335 0.48064251 0.645937728 0.62210747 0.476723500
## MP 0.28649121 0.52201343 0.772935702 0.72864926 0.669840018
## PER 0.05704618 0.49921628 0.586027833 0.58315441 0.484403556
## FG 0.28626348 0.50043728 0.743774007 0.70059995 0.681955374
## FGA 0.33134506 0.40350908 0.689466540 0.63151890 0.715376588
## FG. -0.21313996 0.51194753 0.355968947 0.41617827 -0.013145446
## X3P 0.40868783 -0.08016395 0.291179440 0.19232423 0.538549028
## X3PA 0.40918350 -0.07004892 0.314336426 0.21251451 0.566904322
## X3P. 0.29008737 -0.26706339 -0.003110012 -0.08158491 0.248216688
## X2P 0.18277342 0.63787345 0.783647546 0.77099239 0.609713376
## X2PA 0.21896197 0.57525350 0.747252941 0.72539096 0.644608739
## X2P. -0.10447410 0.36761224 0.293348560 0.32685458 0.001658585
## eFG. -0.08752166 0.32534592 0.302205008 0.32088148 0.034449851
## FT 0.28880454 0.38307955 0.640947577 0.58944252 0.711279295
## FTA 0.21895010 0.48009560 0.708277736 0.66821541 0.691057409
## FT. 1.00000000 -0.12252171 0.072456836 0.01740922 0.269216934
## ORB 0.00000000 1.00000000 0.819207044 0.90488119 0.126165942
## DRB 0.00000000 0.00000000 1.000000000 0.98540258 0.436216393
## TRB 0.00000000 0.00000000 0.000000000 1.00000000 0.361222336
## AST 0.00000000 0.00000000 0.000000000 0.00000000 1.000000000
## STL 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## BLK 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## TOV 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## PF 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## PTS 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## MPG 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## PPG 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## APG 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## RPG 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## TOPG 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## BPG 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## SPG 0.00000000 0.00000000 0.000000000 0.00000000 0.000000000
## STL BLK TOV PF PTS
## salary17_18 0.56379364 0.405987556 0.62845852 0.48338156 0.7157741
## G 0.66829795 0.449877191 0.60270921 0.81570756 0.6433520
## MP 0.82684636 0.502335436 0.79959074 0.83293647 0.8835567
## PER 0.45500001 0.463567489 0.59107197 0.41549488 0.6762233
## FG 0.72479142 0.481591005 0.85959439 0.70347167 0.9896152
## FGA 0.74298486 0.400312543 0.86985687 0.68214816 0.9879763
## FG. 0.08127035 0.424023097 0.11725625 0.29000003 0.1688830
## X3P 0.54678772 0.003637332 0.55989387 0.39448719 0.7045158
## X3PA 0.57524434 0.015822964 0.59372429 0.41713013 0.7178639
## X3P. 0.23889955 -0.158385446 0.18359323 0.09198189 0.2566495
## X2P 0.65826375 0.581486954 0.81618586 0.69347444 0.9156292
## X2PA 0.67683696 0.524370807 0.83571572 0.68169432 0.9255702
## X2P. 0.10047801 0.327099310 0.09798928 0.24374391 0.1571218
## eFG. 0.14291659 0.300268217 0.12371712 0.28942785 0.2278280
## FT 0.64726111 0.391791262 0.85829974 0.55663061 0.9149339
## FTA 0.65849693 0.470679553 0.86692516 0.60430541 0.9088559
## FT. 0.22049926 -0.064421288 0.24550816 0.15073237 0.3190164
## ORB 0.37103912 0.744625701 0.40863536 0.66949249 0.4357990
## DRB 0.62431281 0.739224808 0.67973712 0.78557804 0.7126652
## TRB 0.57352164 0.769709675 0.62581920 0.78181122 0.6583227
## AST 0.73708894 0.145540378 0.86938067 0.46560387 0.7140799
## STL 1.00000000 0.363590600 0.75965809 0.68426008 0.7307398
## BLK 0.00000000 1.000000000 0.39358301 0.64903204 0.4342705
## TOV 0.00000000 0.000000000 1.00000000 0.67961057 0.8772029
## PF 0.00000000 0.000000000 0.00000000 1.00000000 0.6766028
## PTS 0.00000000 0.000000000 0.00000000 0.00000000 1.0000000
## MPG 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000
## PPG 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000
## APG 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000
## RPG 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000
## TOPG 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000
## BPG 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000
## SPG 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000
## MPG PPG APG RPG TOPG
## salary17_18 0.6756657 0.7082822 0.492047480 0.55651955 0.59115458
## G 0.6000674 0.4657513 0.296635795 0.41090577 0.36665082
## MP 0.9026938 0.7807740 0.542857184 0.57866873 0.64067968
## PER 0.4983711 0.7173900 0.464033303 0.60553942 0.56888971
## FG 0.8430015 0.9359923 0.585507791 0.59299219 0.74969715
## FGA 0.8498191 0.9336825 0.620109243 0.51560975 0.76031813
## FG. 0.1541795 0.1689120 -0.054243558 0.45634656 0.08949098
## X3P 0.6285275 0.6671983 0.473201002 0.07908925 0.47341425
## X3PA 0.6425828 0.6732768 0.495159988 0.09208939 0.50032179
## X3P. 0.2806115 0.2834371 0.269886231 -0.16102889 0.16911185
## X2P 0.7686038 0.8656719 0.519148785 0.68610421 0.71780717
## X2PA 0.7825088 0.8774953 0.556728981 0.63646222 0.74059552
## X2P. 0.1513899 0.1456249 -0.059501254 0.32920693 0.05463311
## eFG. 0.2438808 0.2309847 -0.004166088 0.31826360 0.07613972
## FT 0.7058542 0.8891578 0.638226170 0.51921441 0.79039377
## FTA 0.7163354 0.8767712 0.612927435 0.59812492 0.79349957
## FT. 0.2739134 0.3192902 0.242115046 -0.05936803 0.23327341
## ORB 0.4336674 0.3559083 0.030434803 0.87337833 0.30350249
## DRB 0.6898317 0.6308532 0.325756444 0.91792984 0.55707218
## TRB 0.6407423 0.5738846 0.250818881 0.94056859 0.50356604
## AST 0.6438401 0.6751446 0.951591658 0.26454986 0.80943544
## STL 0.7613074 0.6534028 0.641699122 0.45546886 0.64502757
## BLK 0.4264490 0.3704964 0.060166513 0.72868249 0.30733994
## TOV 0.7503527 0.8234270 0.786690270 0.52845627 0.92136316
## PF 0.7026597 0.5616084 0.332475677 0.65614118 0.52425083
## PTS 0.8381629 0.9514283 0.620743879 0.55204455 0.77298599
## MPG 1.0000000 0.8595073 0.622682536 0.60489758 0.73232360
## PPG 0.0000000 1.0000000 0.656454051 0.54795146 0.82235449
## APG 0.0000000 0.0000000 1.000000000 0.21133896 0.83380913
## RPG 0.0000000 0.0000000 0.000000000 1.00000000 0.50023886
## TOPG 0.0000000 0.0000000 0.000000000 0.00000000 1.00000000
## BPG 0.0000000 0.0000000 0.000000000 0.00000000 0.00000000
## SPG 0.0000000 0.0000000 0.000000000 0.00000000 0.00000000
## BPG SPG
## salary17_18 0.33502959 0.51540631
## G 0.25684256 0.43116922
## MP 0.34310234 0.66748946
## PER 0.46326483 0.42819189
## FG 0.35908904 0.60230578
## FGA 0.27625971 0.62203485
## FG. 0.44205979 0.04794435
## X3P -0.08991718 0.46275050
## X3PA -0.08392790 0.48448546
## X3P. -0.22184596 0.21142197
## X2P 0.47068528 0.54366886
## X2PA 0.41337409 0.56508875
## X2P. 0.32957208 0.05746270
## eFG. 0.28965142 0.09951286
## FT 0.30620161 0.56514170
## FTA 0.37979142 0.57004220
## FT. -0.16634858 0.17808594
## ORB 0.65794866 0.25941260
## DRB 0.62046343 0.49316191
## TRB 0.65583236 0.44304238
## AST 0.05157773 0.66861909
## STL 0.24328392 0.91915520
## BLK 0.93722527 0.26649034
## TOV 0.28857064 0.65887316
## PF 0.51039474 0.52357834
## PTS 0.31614367 0.61455044
## MPG 0.34634751 0.74063958
## PPG 0.31441631 0.62717303
## APG 0.00412646 0.66288102
## RPG 0.70586878 0.40577648
## TOPG 0.27284083 0.65899643
## BPG 1.00000000 0.20828241
## SPG 0.00000000 1.00000000
\(資料視覺化\)
plot_ly(data = data17_salary, x = ~salary17_18, y = ~PPG, color = ~Team,
hoverinfo = "text",
text = ~paste("Player: ", Player,
"<br>Salary: ", format(salary17_18, big.mark = ","),"$",
"<br>PPG: ", round(PPG, digits = 3),
"<br>Team: ", Team)) %>%
layout(
title = "Salary vs Point Per Game",
xaxis = list(title = "Salary USD"),
yaxis = list(title = "Point per Game")
)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
plot_ly(data = data17_salary, x = ~salary17_18, y = ~TOPG, color = ~Team,
hoverinfo = "text",
text = ~paste("Player: ", Player,
"<br>Salary: ", format(salary17_18, big.mark = ","),"$",
"<br>TOPG: ", round(TOPG, digits = 2),
"<br>Team: ", Team)) %>%
layout(
title = "Salary vs Turnover Per Game",
xaxis = list(title = "Salary USD"),
yaxis = list(title = "Turnover per Game")
)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
\(將資料分成Train和Test\)
train.ind<-sample(1:nrow(data17_salary),size=ceiling(nrow(data17_salary)/2))
train<-data17_salary[train.ind,]
test<-data17_salary[-train.ind,]
\(進行模型分析\)
\(LM model\)
model.lm<-lm(salary17_18~Age+PPG+TOPG+RPG+STL+BLK+APG,data=train)
summary(model.lm)
##
## Call:
## lm(formula = salary17_18 ~ Age + PPG + TOPG + RPG + STL + BLK +
## APG, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14261932 -2784250 -191145 2184808 18125564
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8457699.1 2171594.3 -3.895 0.000134 ***
## Age 261326.6 82643.4 3.162 0.001813 **
## PPG 687006.4 104405.9 6.580 4.11e-10 ***
## TOPG -819568.5 1141842.5 -0.718 0.473751
## RPG 723807.7 227281.5 3.185 0.001684 **
## STL 731.1 15566.8 0.047 0.962587
## BLK 18992.3 16910.9 1.123 0.262763
## APG 348916.8 434040.5 0.804 0.422430
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4888000 on 198 degrees of freedom
## Multiple R-squared: 0.6153, Adjusted R-squared: 0.6017
## F-statistic: 45.23 on 7 and 198 DF, p-value: < 2.2e-16
\(設定成test和train矩陣\)
grid=10^seq(7,4,length=100)
train.mat=model.matrix(salary17_18~.,data = train %>% select(salary17_18,G:SPG))
test.mat=model.matrix(salary17_18~.,data=test %>% select(salary17_18,G:SPG))
\(Ridge,\alpha=0\)
model.ridge = cv.glmnet(x=train.mat,y=train[, "salary17_18"], alpha=0, lambda=grid, thresh=1e-12)
lambda.best2 = model.ridge$lambda.min
lambda.best2
## [1] 4328761
\(檢視被挑選解釋變數的係數\)
coef(model.ridge)
## 34 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -1445531.7927
## (Intercept) .
## G -7744.7517
## MP 261.0833
## PER 56658.7792
## FG 1864.8485
## FGA 767.4638
## FG. 180644.2758
## X3P 6335.1103
## X3PA 2041.9192
## X3P. -1248365.4272
## X2P 1757.3056
## X2PA 782.4819
## X2P. -268158.6926
## eFG. 741767.0987
## FT 2669.1900
## FTA 2241.2296
## FT. 1224432.5123
## ORB 3732.0297
## DRB 2809.2370
## TRB 1933.8072
## AST 838.8499
## STL 1217.5382
## BLK 5871.5554
## TOV 1917.9837
## PF -1774.3600
## PTS 697.7524
## MPG 38527.6841
## PPG 65035.6400
## APG 79522.5522
## RPG 207685.8636
## TOPG 217030.1600
## BPG 463148.3177
## SPG 295552.0561
\(Lasso,\alpha=1\)
model.lasso = cv.glmnet(x=train.mat,y=train[, "salary17_18"], alpha=1, lambda=grid, thresh=1e-12)
lambda.best1 = model.lasso$lambda.min
lambda.best1
## [1] 284803.6
\(檢視被挑選解釋變數的係數\)
coef(model.lasso)
## 34 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 1488482.320
## (Intercept) .
## G .
## MP .
## PER .
## FG .
## FGA .
## FG. .
## X3P .
## X3PA .
## X3P. .
## X2P .
## X2PA .
## X2P. .
## eFG. .
## FT .
## FTA .
## FT. .
## ORB .
## DRB 9893.532
## TRB .
## AST .
## STL .
## BLK .
## TOV .
## PF .
## PTS 1369.086
## MPG .
## PPG 378912.030
## APG .
## RPG .
## TOPG .
## BPG .
## SPG .
\(決策樹\)
set.seed(2)
model.cart <- rpart(salary17_18~PER+FG+FGA+X2PA+eFG.+ORB+DRB+TRB+AST+STL+BLK+TOV+PF+PTS+MPG+PPG+APG+RPG+TOPG+BPG+SPG, data =train)
model.cart
## n= 206
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 206 1.229781e+16 7638747
## 2) PTS< 1018.5 166 3.725644e+15 4969364
## 4) MPG< 19.85147 93 7.711351e+14 2958377
## 8) TRB< 73.5 38 1.753339e+13 1334196 *
## 9) TRB>=73.5 55 5.841005e+14 4080539 *
## 5) MPG>=19.85147 73 2.099272e+15 7531305
## 10) MPG< 27.39211 50 1.384692e+15 6349789
## 20) PF>=135.5 22 2.287109e+14 4062707 *
## 21) PF< 135.5 28 9.504879e+14 8146781
## 42) TRB< 241 18 5.462016e+14 6260651 *
## 43) TRB>=241 10 2.249889e+14 11541820 *
## 11) MPG>=27.39211 23 4.930441e+14 10099820
## 22) RPG< 6.240067 16 3.032249e+14 8282140 *
## 23) RPG>=6.240067 7 1.612573e+13 14254510 *
## 3) PTS>=1018.5 40 2.480481e+15 18716690
## 6) RPG< 3.919516 13 5.055235e+14 12524480 *
## 7) RPG>=3.919516 27 1.236490e+15 21698120
## 14) MPG< 31.57253 7 3.658076e+14 15247690 *
## 15) MPG>=31.57253 20 4.774866e+14 23955770
## 30) ORB< 60.5 9 1.081093e+14 20790500 *
## 31) ORB>=60.5 11 2.054305e+14 26545550 *
prp(
model.cart,
faclen = 0,
fallen.leaves = T,
extra = 0
)
\(Random Forest\)
myControl <- trainControl(method = "cv", number = 5 , verboseIter = F)
set.seed(183)
model.ranger <- train(
salary17_18 ~.-Player-Team-Year,
data = train,
tuneLength = 3,
method = "ranger",
importance = 'impurity',
trControl = myControl
)
model.ranger
## Random Forest
##
## 206 samples
## 37 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 165, 165, 165, 164, 165
## Resampling results across tuning parameters:
##
## mtry splitrule RMSE Rsquared MAE
## 2 variance 5176719 0.5786225 3930975
## 2 extratrees 5449118 0.5767101 4246082
## 29 variance 4917617 0.5897345 3524808
## 29 extratrees 5005317 0.5804470 3570502
## 56 variance 4891802 0.5972117 3503811
## 56 extratrees 4943504 0.5862334 3548744
##
## Tuning parameter 'min.node.size' was held constant at a value of 5
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were mtry = 56, splitrule =
## variance and min.node.size = 5.
\(根據上面的模型訓練,進行預測\)
\(LM prediction\)
pred.lm <- predict(model.lm,newdata=test)
ggplot(data = test)+geom_point(aes(x=pred.lm,y=test$salary17_18))+geom_line(aes(x=test$salary17_18,y=test$salary17_18))+labs(title="LM Prediction",
x="Predict Salary",
y="Actual Salary")
\(Lasso preidiction\)
pred.lasso = predict(model.lasso, newx=test.mat, s=lambda.best1)
ggplot(data = test)+geom_point(aes(x=pred.lasso,y=test$salary17_18))+geom_line(aes(x=test$salary17_18,y=test$salary17_18))+labs(title="Lasso Prediction",
x="Predict Salary",
y="Actual Salary")
\(Ridge prediction\)
pred.ridge = predict(model.ridge, newx=test.mat, s=lambda.best2)
ggplot(data = test)+geom_point(aes(x=pred.ridge,y=test$salary17_18))+geom_line(aes(x=test$salary17_18,y=test$salary17_18))+labs(title="Ridge Prediction",
x="Predict Salary",
y="Actual Salary")
\(Decision Tree Preidiction\)
pred.cart <- predict(model.cart, newdata = test)
ggplot(data = test)+geom_point(aes(x=pred.cart,y=test$salary17_18))+geom_line(aes(x=test$salary17_18,y=test$salary17_18))+labs(title="DT Prediction",
x="Predict Salary",
y="Actual Salary")
\(Random forest\)
pred.rf<-predict(model.ranger,newdata=test)
ggplot(data = test) + geom_point(aes(x = pred.rf, y = test$salary17_18)) +
geom_line(aes(x = test$salary17_18, y = test$salary17_18)) + labs(title =
"RF Prediction",
x = "Predict Salary",
y = "Actual Salary")
\(計算各預測的R^2為何\)
test.avg=mean(test[,'salary17_18'])
R.lm<-1-mean((test[, "salary17_18"] - pred.lm)^2)/mean((test$salary17_18-test.avg)^2)
R.lasso<-1-mean((test[, "salary17_18"] - pred.lasso)^2)/mean((test$salary17_18-test.avg)^2)
R.ridge<-1-mean((test[, "salary17_18"] - pred.ridge)^2)/mean((test$salary17_18-test.avg)^2)
R.dt<-1-mean((test[, "salary17_18"] - pred.cart)^2)/mean((test$salary17_18-test.avg)^2)
R.rf<-1-mean((test[, "salary17_18"] - pred.rf)^2)/mean((test$salary17_18-test.avg)^2)
\(將上述結果畫圖並比較\)
barplot(c(R.lm,R.lasso,R.ridge,R.dt,R.rf), col="red", names.arg=c("Lm", "Lasso", "Ridge", "DT",'RF'), main="Test R-squared")
\(選取球員:Nicola Jokic來驗證我們的模型\) \(首先放入LM\)
setwd("~/Desktop/R/data")
Jokic<-read.csv('Jokic.csv')
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : incomplete final line found by readTableHeader on 'Jokic.csv'
Jokic.salary=25480000
pred.lm_Jokic<-predict(model.lm,newdata = Jokic)
c(pred.lm_Jokic,Jokic.salary)
## 1
## 18784786 25480000
\(再來是Ridge\)
Jokic.mat=model.matrix(salary18_19~.,data = Jokic %>% select(salary18_19,G:SPG))
pred.ridge_Jokic<-predict(model.ridge,newx=Jokic.mat,s=lambda.best2)
c(pred.ridge_Jokic,Jokic.salary)
## [1] 19848899 25480000
\(最後使用RF\)
pred.rf_Jokic<-predict(model.ranger,newdata=Jokic)
c(pred.rf_Jokic,Jokic.salary)
## [1] 22234748 25480000